norm = "SCT"
if (norm == "log"){assay = "RNA"}
if (norm == "SCT"){assay = "SCT"}
Created an integrated seurat object with 4 assays. Using subset of cells from sample 07
DefaultAssay(seu) <- assay
if (norm == "log"){nPCA = 20}
if (norm == "SCT"){nPCA = 30}
seu <- RunPCA(seu, features = VariableFeatures(seu) )
## Warning: Cannot add objects with duplicate keys (offending key: PC_),
## setting key to 'pca_'
seu <- RunTSNE(seu, dims = 1:nPCA )
seu <- FindNeighbors(seu, dims = 1:nPCA)
## Computing nearest neighbor graph
## Computing SNN
seu <- FindClusters(seu, verbose = FALSE)
Using 5000 most variably expressed decidua genes as reference.
Each dataset is clustered separately
# how many cores on HPC?
con <- Conos$new(seu_list, n.cores=4)
con$plotPanel(clustering="multilevel", use.local.clusters=T, title.size=6)
con$buildGraph(k=30, k.self=5, space='PCA', ncomps=30, n.odgenes=2000,
matching.method='mNN', metric='angular',
score.component.variance=TRUE, verbose=TRUE)
## found 0 out of 1 cached PCA space pairs ... running 1 additional PCA space pairs
## Warning in scaledMatricesSeuratV3(so.objs = samples, data.type =
## data.type, : Seurat doesn't support variance scaling
## . done
## inter-sample links using mNN
## Warning in scaledMatricesSeuratV3(so.objs = samples, data.type =
## data.type, : Seurat doesn't support variance scaling
## . done
## local pairs local pairs done
## building graph ..done
con$findCommunities(method=leiden.community, resolution=1)
Clusters correspond between datasets
con$plotPanel(font.size=4)
cellannot <- seu_list[["decidua"]]$annotation
# propagating labels from reference
new.label.probabilities <- con$propagateLabels(labels = cellannot, verbose=T, fixed.initial.labels=T)
new.label.probabilities <- new.label.probabilities[complete.cases(new.label.probabilities),]
#pdf(paste0("/icgc/dkfzlsdf/analysis/B210/Evelin/plots/",norm,"_uncertainty.pdf"))
con$plotGraph(colors=(1 - apply(new.label.probabilities, 1, max)), show.legend=T, legend.title="Uncertainty", legend.pos=c(1, 0))
## Estimating embeddings.
#dev.off()
Clusters labelled based on reference
#pdf(paste0("/icgc/dkfzlsdf/analysis/B210/Evelin/plots/",norm,"_conos_labelled_clusters.pdf"))
con$plotPanel(groups = new.annot)
#dev.off()
Projecting conos predictions onto initial UMAP from Seurat
DimPlot(seu_full, reduction = "log.umap", group.by = "conos", cells = which(!is.na(seu_full$conos)))
DimPlot(seu_full, reduction = "log.umap", group.by = "conos", cells = which(seu_full$conos %in% c("SCT", "VCT", "EVT", "fFB1", "fFB2"))) + labs(title = "predicted fetal cells")
DimPlot(seu_full, reduction = "log.umap", group.by = "conos", cells = which(!(is.na(seu_full$conos) | seu_full$conos %in% c("SCT", "VCT", "EVT", "fFB1", "fFB2")))) + labs(title = "predicted maternal cells")
seu_full$celltype = 0
seu_full$celltype[which(seu_full$SCT.clusters == 14 | seu_full$log.clusters == 15) ]="endothelial"
seu_full$celltype[which(seu_full$SCT.clusters %in% c(0, 2, 3, 6, 9, 12)) ]="epithelial"
seu_full$celltype[which(seu_full$SCT.clusters %in% c(1, 5)) ]="stromal"
seu_full$celltype[which(seu_full$SCT.clusters %in% c(11)) ]="smooth muscle"
seu_full$celltype[which(seu_full$celltype==0) ]=NA
plot_grid(DimPlot(seu_full, reduction = "log.umap", group.by = "celltype", cells = which(!is.na(seu_full$celltype))), DimPlot(seu_full, reduction = "SCT.umap", group.by = "celltype", cells = which(!is.na(seu_full$celltype))), DimPlot(seu_full, reduction = "integrated.umap", group.by = "celltype", cells = which(!is.na(seu_full$celltype))), ncol = 3)
DimPlot(seu_full, reduction = "log.umap", group.by = "celltype", cells = which(!is.na(seu_full$celltype)))
Scmap uses SingleCellExperiment objects as input
seu = seu_list[["menstrual"]]
rowData = data.frame(feature_symbol = row.names(seu[[assay]]))
colData = data.frame(Barcode = colnames(seu[[assay]]))
cluster_info = data.frame(cluster = Idents(seu))
cluster_info$Barcode = row.names(cluster_info)
colData = left_join(colData, cluster_info, by = "Barcode")
## Warning: Column `Barcode` joining factor and character vector, coercing
## into character vector
assays = return_assay(seu, assay)
se = SummarizedExperiment(assays = list(counts = assays[[1]], logcounts = assays[[2]]), rowData = rowData, colData = colData)
rm(seu)
menstrual = as(se, "SingleCellExperiment")
load(file = paste0("/icgc/dkfzlsdf/analysis/B210/Evelin/sce_RData/",norm,"_decidua_5000.RData"))
Feature selection
decidua <- selectFeatures(decidua, suppress_plot = FALSE, 1000)
decidua <- indexCluster(decidua, cluster_col = "annotation")
Variable genes
#pdf(paste0("/icgc/dkfzlsdf/analysis/B210/Evelin/plots/",assay,"50_variable_decidua.pdf"))
pheatmap(metadata(decidua)$scmap_cluster_index, show_rownames = FALSE)
#dev.off()
Celltypes in reference dataset
df = as.data.frame(table(colData(decidua)$annotation))
ggplot(df, aes(x = reorder(Var1, -Freq), y = Freq)) + geom_col() +
theme_bw() + theme(axis.text.x = element_text(angle = 45)) +
labs(title = "decidua reference celltypes")
#ggsave(paste0("/icgc/dkfzlsdf/analysis/B210/Evelin/plots/",assay,"_reference_celltypes.pdf"))
df = as.data.frame(table(as.data.frame(scmapCluster_results)$decidua))
ggplot(df, aes(x = reorder(Var1, -Freq), y = Freq)) + geom_col() +
theme_bw() + theme(axis.text.x = element_text(angle = 45)) +
labs(title = "projected celltypes")
#ggsave(paste0("/icgc/dkfzlsdf/analysis/B210/Evelin/plots/",assay,"_celltypes_all_genes.pdf"))